# Replication Archive for: 
# Coppock, Alexander and Donald P. Green. 2020. 
# "Do Belief Systems Exhibit Dynamic Constraint?" 
# The Journal of Politics, Forthcoming.

rm(list = ls())

library(tidyverse)
library(psych)

# Load deduped, anonymized datasets

study_1_elite_w1 <- read_csv("data/raw/study_1_elite_wave_1_raw.csv")
study_1_elite_w2 <- read_csv("data/raw/study_1_elite_wave_2_raw.csv")

# Load factor analysis models
load("data/raw/factor_analysis_models.rdata")

# Load Mturk dataset
study_1_mturk <- read_rds("data/clean/study_1_mturk_cleaned.rds")


# Wave 1 cleaning ---------------------------------------------------------

study_1_elite_w1 <- within(study_1_elite_w1, {
  
  # Covariates
  
  rog_type <- rep(NA, nrow(study_1_elite_w1))
  rog_type[ROG_1 == 1 & ROG_2 == 1] <- "Conservative"
  rog_type[ROG_1 == 1 & ROG_2 == 2] <- "Libertarian"
  rog_type[ROG_1 == 2 & ROG_2 == 1] <- "Communitarian"
  rog_type[ROG_1 == 2 & ROG_2 == 2] <- "Liberal"
  
  pid_7 <- rep(NA, nrow(study_1_elite_w1))
  pid_7[pid_3 == 1 & pid_dem == 1] <- 1
  pid_7[pid_3 == 1 & pid_dem == 2] <- 2
  pid_7[pid_3 == 1 & is.na(pid_dem)] <- 2
  pid_7[pid_3 == 1 & pid_dem == ""] <- 2
  
  pid_7[pid_3 == 4] <- 4
  
  pid_7[pid_3 == 2 & pid_ind == 3] <- 3
  pid_7[pid_3 == 2 & pid_ind == 4] <- 4
  pid_7[pid_3 == 2 & is.na(pid_ind)] <- 4
  pid_7[pid_3 == 2 & pid_ind == 5] <- 5
  
  pid_7[pid_3 == 3 & is.na(pid_rep)] <- 6
  pid_7[pid_3 == 3 & pid_rep == 6] <- 6
  pid_7[pid_3 == 3 & pid_rep == 7] <- 7
  pid_7[is.na(pid_7)] <- 1
  
  pid_3_cat <- recode(pid_7, "1:3='Democrat';4='Independent';5:7='Republican'")
  
  ideo_5 <- recode(ideology, "c(1,9)='Liberal';2='Moderate';3='Conservative';4='Libertarian';5:6='Other'")
  ideo_5[is.na(ideo_5)] <- "Other"
  ideo_5 <- factor(ideo_5, levels = c("Liberal", "Moderate", "Libertarian", "Conservative", "Other"))
  
  educ_5 <- as.numeric(education)
  educ_5[is.na(education)] <- 3
  
  educ_5 <- factor(educ_5, levels = 1:5, labels = c("Less than High School", "High School", "Some College", "College", "Graduate School"))
  
  race_4 <- rep(NA, nrow(study_1_elite_w1))
  race_4[race == 1] <- "White"
  race_4[race == 2] <- "Black"
  race_4[race == 3] <- "Hispanic"
  race_4[race %in% 4:7] <- "Other"
  
  race_4 <- factor(race_4, levels = c("Black", "Hispanic", "White", "Other"))
  
  female <- ifelse(gender == 2, 1, 0)
  female[is.na(female)] <- 0
  
  age_5 <- as.numeric(age)
  age_5[age == 9] <- 4
  age_5[age == 8] <- 7
  age_5 <- age_5 - 2
  age_5[is.na(age_5)] <- 5
  
  age_5 <- factor(age_5, levels = 1:5, labels = c("18 - 29", "30 - 39", "40 - 49", "50 - 59", "60+"))
  
  years_in_profession <- recode(Q51, "1='Less than 1 year';2='1-2 years';3='3-4 years';4='5-9 years';5='10-15 years';6='More than 15 years'")
  type_of_profession <- recode(Q52, "1='Business';2='Non-profit';3='Government';5='Higher Education/Academia';6='Research';7='Research';8='Other'")
  level_of_profession <- recode(Q53, "1='Executive Level';2='Entry Level';4='Mid-Level';5='Senior Level';6='Other'")
  published_opinon <- recode(Q54, "1='Yes';2='No'")
  news_how_often <- recode(Q55, "1='Several times a day';2='Once a day';3='Several times a week';4='Several times a month';6='Never';7='Several times an hour'")
  talk_politics_how_often <- recode(Q56, "1='Several times a day';2='Once a day';3='Several times a week';4='Several times a month';6='Never'")
  asked_about_politics_how_often <- recode(Q58, "1='Often';2='Occasionally';3='Rarely';4='Never'")
  
  Z <- relevel(factor(Z), ref = "control")
  
  #Recode Outcome Measures so larger # indicate more agreement with author
  
  dv_wall_1_w1 <- as.numeric(wall_1_1)
  dv_wall_2_w1 <- -1*as.numeric(wall_2_1) + 101
  dv_wall_3_w1 <- -1*as.numeric(wall_3_1) + 8
  dv_wall_4_w1 <- as.numeric(wall_4_1)
  
  dv_amtrak_1_w1 <- as.numeric(amtrak_1_1)
  dv_amtrak_1_w1[as.numeric(amtrak_1_1) %in% c(6, 7, 8)] <-
    dv_amtrak_1_w1[as.numeric(amtrak_1_1) %in% c(6, 7, 8)] - 1
  # hard brackets subsetting, fixing it since the raw data is missing a 5
  dv_amtrak_2_w1 <- as.numeric(amtrak_2_1)
  dv_amtrak_3_w1 <- -1*as.numeric(amtrak_3_1) + 8
  dv_amtrak_4_w1 <- as.numeric(amtrak_4_1)
  
  dv_vets_1_w1 <- -1 * as.numeric(vets_1_1) + 101
  dv_vets_2_w1 <- as.numeric(vets_2_1)
  dv_vets_3_w1 <- -1 * as.numeric(vets_3_1) + 8
  dv_vets_4_w1 <- as.numeric(vets_4_1)
  
  dv_flat_1_w1 <- -1 * as.numeric(flat_1_1) + 8
  dv_flat_2_w1 <- -1 * as.numeric(flat_2_1) + 101
  dv_flat_3_w1 <- -1 * as.numeric(flat_3_1) + 8
  dv_flat_4_w1 <- -1 * as.numeric(flat_4_1) + 8
  
  dv_wall_main_w1 <- dv_wall_3_w1
  dv_amtrak_main_w1 <- dv_amtrak_1_w1
  dv_vets_main_w1 <- dv_vets_2_w1
  dv_flat_main_w1 <- dv_flat_1_w1
  
})


study_1_elite_w1 <-
  study_1_elite_w1 %>%
  select(
    subject_id,
    elite_type,
    time_spent_reading,
    rog_type,
    pid_7,
    pid_3_cat,
    ideo_5,
    educ_5,
    race_4,
    female,
    age_5,
    years_in_profession,
    type_of_profession,
    level_of_profession,
    published_opinon,
    news_how_often,
    talk_politics_how_often,
    asked_about_politics_how_often,
    Z,
    starts_with("dv_")
  )

# Wave 2 cleaning ---------------------------------------------------------

study_1_elite_w2 <- within(study_1_elite_w2,{
  
  dv_wall_1_w2 <- as.numeric(wall_1_1)
  dv_wall_2_w2 <- -1 * as.numeric(wall_2_1) + 101
  dv_wall_3_w2 <- -1 * as.numeric(wall_3_1) + 8
  dv_wall_4_w2 <- as.numeric(wall_4_1)
  
  dv_amtrak_1_w2 <- as.numeric(amtrak_1_1)
  dv_amtrak_1_w2[as.numeric(amtrak_1_1) %in% c(6, 7, 8)] <-
    dv_amtrak_1_w2[as.numeric(amtrak_1_1) %in% c(6, 7, 8)] - 1
  dv_amtrak_2_w2 <- as.numeric(amtrak_2_1)
  dv_amtrak_3_w2 <- -1 * as.numeric(amtrak_3_1) + 8
  dv_amtrak_4_w2 <- as.numeric(amtrak_4_1)
  
  dv_vets_1_w2 <- -1 * as.numeric(vets_1_1) + 101
  dv_vets_2_w2 <- as.numeric(vets_2_1)
  dv_vets_3_w2 <- -1 * as.numeric(vets_3_1) + 8
  dv_vets_4_w2 <- as.numeric(vets_4_1)
  
  dv_flat_1_w2 <- -1 * as.numeric(flat_1_1) + 8
  dv_flat_2_w2 <- -1 * as.numeric(flat_2_1) + 101
  dv_flat_3_w2 <- -1 * as.numeric(flat_3_1) + 8
  dv_flat_4_w2 <- -1 * as.numeric(flat_4_1) + 8
  
  dv_wall_main_w2 <- dv_wall_3_w2
  dv_amtrak_main_w2 <- dv_amtrak_1_w2
  dv_vets_main_w2 <- dv_vets_2_w2
  dv_flat_main_w2 <- dv_flat_1_w2
  
  # Distraction Randomization
  Z_distract <- as.numeric(Z_distract)
  
  dv_mj_1_w2 <- -1 * as.numeric(mj_1_1) + 8
  dv_mj_2_w2 <- -1 * as.numeric(mj_2_1) + 8
  dv_mj_3_w2 <- -1 * as.numeric(mj_3_2) + 8
  dv_mj_4_w2 <- -1 * as.numeric(mj_4_2) + 8
  
})

study_1_elite_w2 <-
  study_1_elite_w2 %>%
  select(subject_id,
         Z_distract,
         starts_with("dv_"))


# Merge datasets ----------------------------------------------------------

study_1_elite <-
  study_1_elite_w1 %>%
  left_join(study_1_elite_w2)


# Standardize Outcomes ----------------------------------------------------


study_1_elite <- within(study_1_elite,{
  
  dv_wall_1_s_w1 <- dv_wall_1_w1/sd(study_1_mturk$dv_wall_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_2_s_w1 <- dv_wall_2_w1/sd(study_1_mturk$dv_wall_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_3_s_w1 <- dv_wall_3_w1/sd(study_1_mturk$dv_wall_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_4_s_w1 <- dv_wall_4_w1/sd(study_1_mturk$dv_wall_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_1_s_w1 <- dv_amtrak_1_w1/sd(study_1_mturk$dv_amtrak_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_2_s_w1 <- dv_amtrak_2_w1/sd(study_1_mturk$dv_amtrak_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_3_s_w1 <- dv_amtrak_3_w1/sd(study_1_mturk$dv_amtrak_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_4_s_w1 <- dv_amtrak_4_w1/sd(study_1_mturk$dv_amtrak_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_1_s_w1 <- dv_vets_1_w1/sd(study_1_mturk$dv_vets_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_2_s_w1 <- dv_vets_2_w1/sd(study_1_mturk$dv_vets_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_3_s_w1 <- dv_vets_3_w1/sd(study_1_mturk$dv_vets_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_4_s_w1 <- dv_vets_4_w1/sd(study_1_mturk$dv_vets_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_1_s_w1 <- dv_flat_1_w1/sd(study_1_mturk$dv_flat_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_2_s_w1 <- dv_flat_2_w1/sd(study_1_mturk$dv_flat_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_3_s_w1 <- dv_flat_3_w1/sd(study_1_mturk$dv_flat_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_4_s_w1 <- dv_flat_4_w1/sd(study_1_mturk$dv_flat_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_main_s_w1 <- dv_wall_3_s_w1
  dv_amtrak_main_s_w1 <- dv_amtrak_1_s_w1
  dv_vets_main_s_w1 <- dv_vets_2_s_w1
  dv_flat_main_s_w1 <- dv_flat_1_s_w1
  
  dv_wall_1_s_w2 <- dv_wall_1_w2/sd(study_1_mturk$dv_wall_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_2_s_w2 <- dv_wall_2_w2/sd(study_1_mturk$dv_wall_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_3_s_w2 <- dv_wall_3_w2/sd(study_1_mturk$dv_wall_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_wall_4_s_w2 <- dv_wall_4_w2/sd(study_1_mturk$dv_wall_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_1_s_w2 <- dv_amtrak_1_w2/sd(study_1_mturk$dv_amtrak_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_2_s_w2 <- dv_amtrak_2_w2/sd(study_1_mturk$dv_amtrak_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_3_s_w2 <- dv_amtrak_3_w2/sd(study_1_mturk$dv_amtrak_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_amtrak_4_s_w2 <- dv_amtrak_4_w2/sd(study_1_mturk$dv_amtrak_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_1_s_w2 <- dv_vets_1_w2/sd(study_1_mturk$dv_vets_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_2_s_w2 <- dv_vets_2_w2/sd(study_1_mturk$dv_vets_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_3_s_w2 <- dv_vets_3_w2/sd(study_1_mturk$dv_vets_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_vets_4_s_w2 <- dv_vets_4_w2/sd(study_1_mturk$dv_vets_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_1_s_w2 <- dv_flat_1_w2/sd(study_1_mturk$dv_flat_1_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_2_s_w2 <- dv_flat_2_w2/sd(study_1_mturk$dv_flat_2_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_3_s_w2 <- dv_flat_3_w2/sd(study_1_mturk$dv_flat_3_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  dv_flat_4_s_w2 <- dv_flat_4_w2/sd(study_1_mturk$dv_flat_4_w1[study_1_mturk$Z=="control"],na.rm = TRUE)
  
  dv_mj_1_s_w2 <- dv_mj_1_w2/sd(study_1_mturk$dv_mj_1_w2[study_1_mturk$Z_distract==0],na.rm = TRUE)
  dv_mj_2_s_w2 <- dv_mj_2_w2/sd(study_1_mturk$dv_mj_2_w2[study_1_mturk$Z_distract==0],na.rm = TRUE)
  dv_mj_3_s_w2 <- dv_mj_3_w2/sd(study_1_mturk$dv_mj_3_w2[study_1_mturk$Z_distract==0],na.rm = TRUE)
  dv_mj_4_s_w2 <- dv_mj_4_w2/sd(study_1_mturk$dv_mj_4_w2[study_1_mturk$Z_distract==0],na.rm = TRUE)
  
  dv_wall_main_s_w2 <- dv_wall_3_s_w2
  dv_amtrak_main_s_w2 <- dv_amtrak_1_s_w2
  dv_vets_main_s_w2 <- dv_vets_2_s_w2
  dv_flat_main_s_w2 <- dv_flat_1_s_w2
  
})

study_1_elite <- within(study_1_elite,{
  dv_wall_scale_w1 <- predict(fa_wall, data = study_1_elite %>% select(dv_wall_1_s_w1, dv_wall_2_s_w1, dv_wall_3_s_w1, dv_wall_4_s_w1))[,1]
  dv_amtrak_scale_w1 <- predict(fa_amtrak, data = study_1_elite %>% select(dv_amtrak_1_s_w1, dv_amtrak_2_s_w1, dv_amtrak_3_s_w1, dv_amtrak_4_s_w1))[,1]
  dv_vets_scale_w1 <- predict(fa_vets, data = study_1_elite %>% select(dv_vets_1_s_w1, dv_vets_2_s_w1, dv_vets_3_s_w1, dv_vets_4_s_w1))[,1]
  dv_flat_scale_w1 <- predict(fa_flat, data = study_1_elite %>% select(dv_flat_1_s_w1, dv_flat_2_s_w1, dv_flat_3_s_w1, dv_flat_4_s_w1))[,1]

  dv_wall_scale_w2 <- predict(fa_wall, data = study_1_elite %>% select(dv_wall_1_s_w2, dv_wall_2_s_w2, dv_wall_3_s_w2, dv_wall_4_s_w2))[,1]
  dv_amtrak_scale_w2 <- predict(fa_amtrak, data = study_1_elite %>% select(dv_amtrak_1_s_w2, dv_amtrak_2_s_w2, dv_amtrak_3_s_w2, dv_amtrak_4_s_w2))[,1]
  dv_vets_scale_w2 <- predict(fa_vets, data = study_1_elite %>% select(dv_vets_1_s_w2, dv_vets_2_s_w2, dv_vets_3_s_w2, dv_vets_4_s_w2))[,1]
  dv_flat_scale_w2 <- predict(fa_flat, data = study_1_elite %>% select(dv_flat_1_s_w2, dv_flat_2_s_w2, dv_flat_3_s_w2, dv_flat_4_s_w2))[,1]
  dv_mj_scale_w2 <- predict(fa_mj, data = study_1_elite %>% select(dv_mj_1_s_w2, dv_mj_2_s_w2, dv_mj_3_s_w2, dv_mj_4_s_w2))[,1]
  })



study_1_elite <- within(study_1_elite,{
  dv_wall_agree_w1 <- as.numeric(dv_wall_scale_w1 >= medians_df$wall_median)
  dv_amtrak_agree_w1 <- as.numeric(dv_amtrak_scale_w1 >= medians_df$amtrak_median)
  dv_vets_agree_w1 <- as.numeric(dv_vets_scale_w1 >= medians_df$vets_median)
  dv_flat_agree_w1 <- as.numeric(dv_flat_scale_w1 >= medians_df$flat_median)
  dv_wall_agree_w2 <- as.numeric(dv_wall_scale_w2 >= medians_df$wall_median)
  dv_amtrak_agree_w2 <- as.numeric(dv_amtrak_scale_w2 >= medians_df$amtrak_median)
  dv_vets_agree_w2 <- as.numeric(dv_vets_scale_w2 >= medians_df$vets_median)
  dv_flat_agree_w2 <- as.numeric(dv_flat_scale_w2 >= medians_df$flat_median)
  dv_mj_agree_w2 <- as.numeric(dv_mj_scale_w2 >= medians_df$mj_median)
  
  dv_wall_agree_25_w1 <- as.numeric(dv_wall_scale_w1 >= medians_df$wall_25)
  dv_amtrak_agree_25_w1 <- as.numeric(dv_amtrak_scale_w1 >= medians_df$amtrak_25)
  dv_vets_agree_25_w1 <- as.numeric(dv_vets_scale_w1 >= medians_df$vets_25)
  dv_flat_agree_25_w1 <- as.numeric(dv_flat_scale_w1 >= medians_df$flat_25)
  dv_wall_agree_25_w2 <- as.numeric(dv_wall_scale_w2 >= medians_df$wall_25)
  dv_amtrak_agree_25_w2 <- as.numeric(dv_amtrak_scale_w2 >= medians_df$amtrak_25)
  dv_vets_agree_25_w2 <- as.numeric(dv_vets_scale_w2 >= medians_df$vets_25)
  dv_flat_agree_25_w2 <- as.numeric(dv_flat_scale_w2 >= medians_df$flat_25)
  dv_mj_agree_25_w2 <- as.numeric(dv_mj_scale_w2 >= medians_df$mj_25)

  dv_wall_agree_75_w1 <- as.numeric(dv_wall_scale_w1 >= medians_df$wall_75)
  dv_amtrak_agree_75_w1 <- as.numeric(dv_amtrak_scale_w1 >= medians_df$amtrak_75)
  dv_vets_agree_75_w1 <- as.numeric(dv_vets_scale_w1 >= medians_df$vets_75)
  dv_flat_agree_75_w1 <- as.numeric(dv_flat_scale_w1 >= medians_df$flat_75)
  dv_wall_agree_75_w2 <- as.numeric(dv_wall_scale_w2 >= medians_df$wall_75)
  dv_amtrak_agree_75_w2 <- as.numeric(dv_amtrak_scale_w2 >= medians_df$amtrak_75)
  dv_vets_agree_75_w2 <- as.numeric(dv_vets_scale_w2 >= medians_df$vets_75)
  dv_flat_agree_75_w2 <- as.numeric(dv_flat_scale_w2 >= medians_df$flat_75)
  dv_mj_agree_75_w2 <- as.numeric(dv_mj_scale_w2 >= medians_df$mj_75)
  
  
})

study_1_elite <- within(study_1_elite,{
  responded_w1 <- !is.na(dv_wall_scale_w1) & !is.na(dv_amtrak_scale_w1) & !is.na(dv_vets_scale_w1) & !is.na(dv_flat_scale_w1)
  responded_w2 <- !is.na(dv_wall_scale_w2) & !is.na(dv_amtrak_scale_w2) & !is.na(dv_vets_scale_w2) & !is.na(dv_flat_scale_w2)
})

study_1_elite <- filter(study_1_elite, responded_w1)

write_rds(study_1_elite, path = "data/clean/study_1_elite_cleaned.rds")
